*-------------------------------------------------------------------------------
* TITLE: 1.data_creation.do
* DESCRIPTION: prepares data sets for reduced form and structural analysis
* VERSION: MP17.0
* DATE: 08/23/21 
*
* INPUT FILE: PovertyTraps_replication.dta
* OUTPUT FILES: PovertyTraps_analysis.dta, PovertyTraps_structural.dta
*-------------------------------------------------------------------------------



**------------------------------------------------------------------------------
** PREPARE DATASET FOR REDUCED FORM ANALYSIS
**------------------------------------------------------------------------------
use "${Data}/PovertyTraps_replication_data.dta", clear

	* normalize asset values to 1000 BDT
replace pAssets=pAssets/1000

	* define asset value before transfer
gen k0t=pAssets if survey==1

	* impute asset value after transfer for treatment group
	* using branch-level median cow price as the transfer value
replace pAssets=pAssets+(Pcows_no/1000) if survey==1&treat==1&stup==1


	* generate productive assets in constant 2007 (baseline) prices 
	* using branch-level median prices, measures changes in quantities
gen Qcows = Pcows_no * cows_no
gen Qpoultry = Ppoultry_no * poultry_no
gen Qgoat = Pgoat_no * goat_no
gen Qland = Pland_own_total_size * land_own_total_size							
forval i=4/13 {
	gen Qasset`i' = Passet_value`i' * asset_no`i'
}
egen QbizAssets = rowtotal(Qasset*)
egen QpAssets = rowtotal(Qcows Qpoultry Qgoat Qland QbizAssets)
replace QpAssets = QpAssets/1000
gen Qk0t = QpAssets if survey==1
bysort hhid: egen Qk0 = max(Qk0t)
drop Qk0t
replace QpAssets = QpAssets + (Pcows_no/1000) if survey==1 & treat==1 & stup==1


	* generate cross-sectional variables: asset values, age, and savings in all years at HH level

		* baseline assets pre-transfer
bys hhid5: egen k0=max(k0t)	
drop k0t
	
		* generate cross section - survey rounds 1 to 5 (2007, 2009, 2011, 2014, 2018)
forval i = 1/5 {
	
		gen k`i't=pAssets if survey==`i'
		gen Qk`i't = QpAssets if survey==`i'
		gen age`i't=resp_age if survey==`i'
		gen sav`i't= savings if survey==`i'

		bysort hhid5: egen k`i'=max(k`i't)
		bysort hhid5: egen Qk`i'=max(Qk`i't)
		bysort hhid5: egen savings`i'=max(sav`i't)
		bysort hhid5: egen age`i'=max(age`i't)
		
		drop k`i't Qk`i't age`i't sav`i't 
}
	
	*trim top 1% of asset values in each wealth class and survey round
		
		* baseline assets pre-transfer (k0)
forval j=1/4 {
	xtile perck0`j' = k0 if survey==1 & sclass==`j', nq(100)
	replace k0 = . if perck0`j'>=99 & sclass==`j' & survey==1
	drop perck0`j'
	
	xtile percQk0`j' = Qk0 if survey==1 & sclass==`j', nq(100)
	replace Qk0 = . if percQk0`j' >=99 & sclass==`j' & survey==1
	drop percQk0`j'
}
		* survey waves 1 to 4 (2007, 2009, 2011, 2014)
forval i=1/4 {
	forval j=1/4 {
		xtile perck`i'`j'=k`i' if survey==`i' & sclass==`j', nq(100)
		replace k`i'=. if  perck`i'`j'>=99 & sclass==`j' &  survey==`i'
		drop perck`i'`j'
		
		xtile percQk`i'`j'=Qk`i' if survey==`i' & sclass==`j', nq(100)
		replace Qk`i'=. if percQk`i'`j'>=99 & sclass==`j' & survey==`i'
		drop percQk`i'`j'
	}
}

		* survey wave 5 (2018), targeted ultra-poor only
xtile perck5=k5 if survey==5 , nq(100)
replace k5=. if  perck5>=99   &  survey==5
drop perck5

xtile percQk5=Qk5 if survey==5 , nq(100)
replace Qk5=. if percQk5>=99 & survey==5
drop percQk5
		
	* generate variables in logarithm
for var k0* k1* k2* k3* k4* k5* savings*: gen LX=log(X+1)
for var Qk0 Qk1 Qk2 Qk3 Qk4 Qk5: gen LX=log(X+1)
	
	* For each wave, generate difference to baseline post-transfer asset stock
for var Lk2 Lk3 Lk4 Lk5: gen deltaX=X-Lk1
for var LQk2 LQk3 LQk4 LQk5: gen deltaX = X-LQk1
	
	 * trimmed panel variables - assets at constant prices (Qk)									
gen Qk = QpAssets
forval i=1/5 {
	xtile percQk`i'=Qk if survey==`i' & stup==1
	replace Qk = . if percQk`i'>=99 & survey==`i'
	drop percQk`i'
}
label var Qk "1% trimmed PAssets at constant prices"		

		
	
	* save data for reduced form analysis
save "$Data/PovertyTraps_analysis.dta", replace 



**------------------------------------------------------------------------------
** PREPARE DATASET FOR STRUCTURAL ESTIMATION
**------------------------------------------------------------------------------
use "$Data/PovertyTraps_analysis.dta", clear 

set seed 190518
tempfile pf

* estimate production function (treatment villages only) -----------------------
keep if treat==1
replace pAssets=pAssets*1000
trimmean(pAssets), perc(1) gen(tri)
replace pAssets=. if tri==0
gen pAssets2=pAssets^2
gen pAssets3=pAssets^3
keep if survey==2
drop if pAssets==.
su livestock_hours_tot if stup==1
gen hiredInLabour = ot_livestock_hours_total + M_livestock_hours_total if head_gender==1
replace hiredInLabour=ot_livestock_hours_total  if head_gender==0
 drop if livestock_hours_tot==.
for var hiredInLabour: replace X=0 if X==.
gen totalLabour=hiredInLabour + livestock_hours_tot 

		/*estimate income as non linear function of K and L, cubic for low K, quadratic for high K*/
 nl (livestock_inc_tot=(({b1=0.5}*pAssets+{b2=0.5}*pAssets2)*(totalLabour)^{b4=0.5}))  

 gen b1=/b1
 gen b2=/b2
 gen b4=/b4
  
collapse b1 b2 b4 , by (branchid)

su b1 b2 b4
save `pf'

* main dataset: income, wage and hours -----------------------------------------
use "$Data/PovertyTraps_analysis.dta", clear 

gen savRate=savings/(savings+(pce_total*hhsize_adult_eq))
gen hiredInLabour=ot_livestock_hours_total +M_livestock_hours_total if head_gender==1
replace hiredInLabour=ot_livestock_hours_total  if head_gender==0
replace hiredInLabour=0 if hiredInLabour==.

keep hiredInLabour savRate livestock_inc_tot agri_daylabor_inc_tot maid_inc_tot selfemp_inc_tot wage_inc_tot poultry_inc_tot livestock_hours_tot agri_daylabor_hours_tot maid_hours_tot selfemp_hr_tot wage_hr_tot poultry_hours_tot total_hours_work pAssets livestock_inc_per_hr agri_daylabor_inc_per_hr maid_inc_per_hr total_income_resp branchid spotno hhid5 survey k0 treat stup cows_no other_poor middle rich sclass

for var *_inc_tot: renvars X, postdrop (7)
for var *_: renvars X, postf("Y")
for var *_hours_tot: renvars X, postdrop (9)
for var *_: renvars X, postf("H")
for var *_hr_tot: renvars X, postdrop (6)
for var *_: renvars X, postf("H")
for var *_inc_per_hr: renvars X, postdrop (10)
for var *_: renvars X, postf("R")
rename k0 pAssets0
rename total_hours_work labour_H
rename total_income_resp labour_Y

gen labour_R= labour_Y/labour_H


	/* NOTE: wage_H unavailable for W4. Compute as sum of maid and agri_daylabor hours.
		We use only maid and ag labor, since those are the two main wage labor 
		occupations and total income in those allows us to compute hourly wage  */
		
replace wage_H = maid_H + agri_daylabor_H if wage_H==.										


bys survey branchid: egen maidW=mean(maid_R)
bys survey branchid: egen agri_daylaborW=mean(agri_daylabor_R)

bys survey: egen maidWA=mean(maid_R)
bys survey: egen agri_daylaborWA=mean(agri_daylabor_R)

egen wageM=rowmean(maidW agri_daylaborW)
egen wageMA= rowmean(maidWA agri_daylaborWA)

for var *_H: gen Xshare=X/labour_H

label var hiredInLabour "HH members hours in livestock"

for var *_Y: label var X "income in activity X"
for var *_R: label var X "income per hour in activity X"
for var *_H: label var X "hours in activity X"
for var *W: label var X  "mean wage in X, branch level"
for var *WA: label var X "mean wage in X"
label var wageM "average maid/aglab wage, branch level"
label var wageMA "average maid/aglab wage"


	/* merge dependency ratio (?)
	
		rename hhid5 hhid3
		merge m:1 hhid3 using "${Data}/allHHMembers_dependencyratio.dta"
		drop if _merge==2
		drop _merge
		//rename hhid3 hhid5
		
	*/
	
	/* threshold variables
	
		gen kHat=2.34
		gen loSavKHat=2.36
		gen hiSavKHat=2.28
		label var kHat "k threshold"
		label var loSavKHat "k threshold for low saving HH"
		label var hiSavKHat "k threshold for high saving HH"

	*/
	
	




	* merge production function parameters 
merge m:1 branchid using `pf', nogen

	* save data
	
order branchid spotno treat survey_wave stup selfemp_H selfemp_Y wage_H wage_Y maid_H maid_Y maid_R poultry_H poultry_Y livestock_H livestock_Y livestock_R agri_daylabor_H agri_daylabor_Y agri_daylabor_R cows_no labour_H labour_Y hhid5 pAssets pAssets0 savRate hiredInLabour labour_R maidW agri_daylaborW maidWA agri_daylaborWA wageM wageMA selfemp_Hshare wage_Hshare maid_Hshare poultry_Hshare livestock_Hshare agri_daylabor_Hshare labour_Hshare 
	
compress
save "$Data/structural/input/PovertyTraps_structural.dta", replace 





* additional datasets ----------------------------------------------------------

	* HH wealth classes

	* wage hours by branch 
	
	* specially targeted ultra-poor (stup) IDs 
	
	* wave 5 only


**------------------------------------------------------------------------------
** PREPARE DATASET FOR SHAPE TEST AND P-SPLINE ESTIMATION
**------------------------------------------------------------------------------

use "$Data/PovertyTraps_analysis.dta", clear 
keep if survey==1 & stup & treat & Lk3!=. & Lk1<=3
keep Lk1 Lk3 
order Lk1 Lk3 
export delimited using "$stest/Test1data.csv", replace
 